###########################################################################################################
# Replication for county-level analysis for Bavaria presented in Table A24 (sorting after resettlement)
# Charnysh and Schaub, Migration and Social Change: Evidence from post-WWII Displacement in Germany, JOP
# R version 4.3.1 (2023-06-16) 
###########################################################################################################

rm(list = ls())
library(openxlsx)
library(stargazer)
library(sandwich)
library(lmtest)


dat3950<-read.xlsx("county_data/dataset19391950.xlsx") #file that includes distances and gender
dat3950$Evang1950[which(dat3950$KREIS50=="5536000")]<-13184  ##Correct Evang1950 for Landkreis Münster based on Gemeindestatistik Nordheim Westfalen, pages 1-23 


mig6061<-read.xlsx("county_data/migration196061merged.xlsx")

dat<-merge(dat3950, mig6061, by="Kennziffer")


dat$pfemale1939<-dat$Women1939/dat$Tot1939 

dat$ShareExp1950<-dat$Heimatvertriebene1950/dat$Pop1950

dat$ShareCath1939<-dat$Katholische1939/dat$Population1939
dat$ShareEvang1939<-dat$Evangelische1939/dat$Population1939
dat$ShareOther1939<-(dat$Population1939-dat$Evangelische1939-dat$Katholische1939)/dat$Population1939

dat$Frac1939<-1-(dat$ShareCath1939^2+dat$ShareEvang1939^2+dat$ShareOther1939^2)
summary(dat$Frac1939)

dat$ShareAgr1939<-dat$AgricultureInsgesamt1939/dat$Erwerbspersonen_Total1939 
dat$ShareDestroyed<-dat$KriegsschaedenNWohnungen/dat$NormalWohnungen 
dat$lnDistEastBorder<-log(dat$distance_to_borderkm)
dat$ShareCath1950<-dat$Kath1950/dat$Pop1950

dat[which(is.na(dat$KriegsschaedenNWohnungen)),] #Two places that are NAs in Braun's dataset.


dat$OtherReligion1950<-dat$Pop1950-dat$Kath1950-dat$Evang1950
summary(dat$OtherReligion1950)

dat$Frac1950<-1-((dat$Kath1950/dat$Pop1950)^2+(dat$Evang1950/dat$Pop1950)^2+(dat$OtherReligion1950/dat$Pop1950)^2)
summary(dat$Frac1950) 

dat$lnPop1939<-log(dat$Population1939)
dat$PopDens1939<-dat$Population1939/dat$AREA50
dat$DeltaFrac<-dat$Frac1950-dat$Frac1939

#Calculate migration variables
dat$ptarrived1960<-dat$Zugezogene60/dat$Population1960 
dat$ptleft1960<-dat$Fortgezogene60/dat$Population1960 

dat$PtSurplusMig195061<-dat$SurplusInOutM19501961/dat$Pop1950 #Proportion of the original population that migrated in the next 10 years. 


################################################################################
############# Table A24: Religious diversity and in/out migration  #############
################################################################################

reg1<-lm(PtSurplusMig195061~DeltaFrac+Frac1939, data=dat)

reg2<-lm(PtSurplusMig195061~DeltaFrac+Frac1939+ShareExp1950+pfemale1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+PopDens1939+lnPop1939, data=dat)

reg3<-lm(ptarrived1960~DeltaFrac+Frac1939, data=dat)
 
reg4<-lm(ptarrived1960~DeltaFrac +Frac1939+ShareExp1950+pfemale1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+PopDens1939+lnPop1939, data=dat)

reg5<-lm(ptleft1960~DeltaFrac+Frac1939, data=dat)

reg6<-lm(ptleft1960~DeltaFrac +Frac1939+ShareExp1950+pfemale1939+ShareAgr1939+ShareDestroyed+lnDistEastBorder+PopDens1939+lnPop1939, data=dat)


ses1 <- list(coeftest(reg1, vcov = vcovHC(reg1, type="HC1"))[,2], coeftest(reg2, vcov = vcovHC(reg2, type="HC1"))[,2], 
             coeftest(reg3, vcov = vcovHC(reg3, type="HC1"))[,2], coeftest(reg4, vcov = vcovHC(reg4, type="HC1"))[,2],
             coeftest(reg5, vcov = vcovHC(reg5, type="HC1"))[,2], coeftest(reg6, vcov = vcovHC(reg6, type="HC1"))[,2])

pvals1 <- list(coeftest(reg1, vcov = vcovHC(reg1, type="HC1"))[,4], coeftest(reg2, vcov = vcovHC(reg2, type="HC1"))[,4],  
               coeftest(reg3, vcov = vcovHC(reg3, type="HC1"))[,4], coeftest(reg4, vcov = vcovHC(reg4, type="HC1"))[,4],
               coeftest(reg5, vcov = vcovHC(reg5, type="HC1"))[,4], coeftest(reg6, vcov = vcovHC(reg6, type="HC1"))[,4])


means_dv <- c(rep(round(mean(dat$PtSurplusMig195061,na.rm=T),2), 2),rep(round(mean(dat$ptarrived1960,na.rm=T),2), 2), rep(round(mean(dat$ptleft1960,na.rm=T),2), 2))

sds_dv <- c(rep(round(sd(dat$PtSurplusMig195061,na.rm=T),2), 2), rep(round(sd(dat$ptarrived1960,na.rm=T),2), 2),rep(round(sd(dat$ptleft1960,na.rm=T),2), 2))

mean_row <- c("DV mean", means_dv)
sd_row <- c("DV sd", sds_dv)


stargazer(
  reg1, reg2, reg3, reg4, reg5, reg6,
  se = ses1, p = pvals1, digits = 2,
  covariate.labels = c(
    "Delta diversity", "Fractionalization (1939)", "Share expellees", 
    "Share female (1939)", "Share in agriculture (1939)", "Share destroyed", 
    "Ln dist to the Eastern border", "Pop density (1939)", "Ln population (1939)"
  ),
  omit = c("Constant"), omit.stat = c("ser", "f", "rsq"), no.space = TRUE,
  column.labels = c("Net in/out migration 1950-1961", "In-migration 1960", "Out-migration 1960"),
  title = "Net in/out migration. Robust SEs in parentheses.",
  add.lines = list(mean_row, sd_row)  # Add the custom rows
)